###############################################################################################
## Multiple Imputation Replication R Code for
## Babies across Borders: The Political Economy of International Child Adoption
## Authors: Asif Efrat, David Leblang, Steven Liao, Sonal S. Pandya
###############################################################################################

###############################################################################################
## CAUTION: the following code is too computationally intensive to run on a personal computer
## We implement parallel MI by submitting 10 jobs to 10 different nodes of a cluster
## For each job we only request 1 multiply imputed dataset using the code below
## Each job is also implemented in parallel within nodes by the number of CPU (cores)
## We combine the 10 multiply imputed datasets once all 10 jobs are complete
## The following code simply demonstrates how to replicate our multiple imputation dataset if one
## could run the code on a single supercomputer
## For further details on Multiple imputation on University of Virginia's ITS Linux cluster with Amelia
## Please see <https://github.com/stevenliaotw/parallel> or contact Steven Liao <stevenliao@virginia.edu>
################################################################################################

# clear all objects in workspace
# rm(list = ls())

# load library
library(Amelia)
# library(parallel)
# library(multicore)
library(foreign)

# Get command line argument
cmdArgs = commandArgs(trailingOnly=TRUE)
inputNum = 1
runNum = as.numeric(cmdArgs[inputNum])
print(paste("Run Number:",as.character(runNum)))

numCores = 16

# set seed
set.seed(1234*runNum)

# load data
load("/Users/stevenliao/Dropbox/international_adoption/final/replication/data/merge.RData")

# run Amelia
data.mi <- amelia(x = data, 
                   m = 10, 
                   idvars = c("iso3c_d", "ifs_d", "cty_d", "iso3c_o", "ifs_o", "cty_o"),
                   ords = c("commonlegal", "commonreligion", "execnat_o_L1", "Mjussolis_o_L1", "Mdualcit_o_L1"),
                   sqrt = c("Iflow_L1", "Istock_L1", "trade_L1", "creg_muslim_o_L1", "nat_disasters_o_L1", "refugees_o_L1", "troops_o_L1", 
                            "us_econ_aid_o_L1", "us_mil_aid_o_L1", "Mvotingrts_o_L1", 
                            "f_labor_par_L1", "f_edu_ratio_L1", "f_parli_seats_L1", "rural_pop_L1", "pop14_per_L1", 
                            "adol_fertility_rate_L1", "fertility_rate_L1", "birth_rate_L1", "death_rate_L1",
                            "f_pop_hiv_L1", "immune_dpt_L1", "immune_measles_L1", "health_expend_L1", "n_mortality_L1", "urban_pop_L1"),
                   logs = c("rgdpe_o_L1", "rgdpo_o_L1", "pwt_pop_o_L1", "rgdppc_exp_L1", "rgdppc_out_L1", "Mresidency_o_L1", 
                            "wdi_pop_tot_L1", "pop14_mil_L1"),
                   lags = c("commonlegal", "commonreligion", 
                            "Iflow_L1", "Istock_L1", "trade_L1",
                            "creg_muslim_o_L1", "nat_disasters_o_L1", "refugees_o_L1", "troops_o_L1", "us_econ_aid_o_L1", "us_mil_aid_o_L1",
                            "gov_effect_o_L1", "reg_quality_o_L1", "rule_law_o_L1", "control_corrupt_o_L1", 
                            "rgdpe_o_L1", "rgdpo_o_L1", "pwt_pop_o_L1", "rgdppc_exp_L1", "rgdppc_out_L1",
                            "execnat_o_L1", "Mjussolis_o_L1", "Mresidency_o_L1", "Mdualcit_o_L1", "Mvotingrts_o_L1", 
                            "f_pop_L1", "f_labor_par_L1", "f_edu_ratio_L1", "f_parli_seats_L1", "rural_pop_L1", "pop14_per_L1",
                            "wdi_pop_tot_L1", "pop14_mil_L1", "adol_fertility_rate_L1", "fertility_rate_L1", "birth_rate_L1", "death_rate_L1", 
                            "f_pop_hiv_L1", "immune_dpt_L1", "immune_measles_L1", "health_expend_L1", "n_mortality_L1", "urban_pop_L1"),
                   leads = c("commonlegal", "commonreligion", 
                             "Iflow_L1", "Istock_L1", "trade_L1",
                             "creg_muslim_o_L1", "nat_disasters_o_L1", "refugees_o_L1", "troops_o_L1", "us_econ_aid_o_L1", "us_mil_aid_o_L1",
                             "gov_effect_o_L1", "reg_quality_o_L1", "rule_law_o_L1", "control_corrupt_o_L1", 
                             "rgdpe_o_L1", "rgdpo_o_L1", "pwt_pop_o_L1", "rgdppc_exp_L1", "rgdppc_out_L1",
                             "execnat_o_L1", "Mjussolis_o_L1", "Mresidency_o_L1", "Mdualcit_o_L1", "Mvotingrts_o_L1", 
                             "f_pop_L1", "f_labor_par_L1", "f_edu_ratio_L1", "f_parli_seats_L1", "rural_pop_L1", "pop14_per_L1",
                             "wdi_pop_tot_L1", "pop14_mil_L1", "adol_fertility_rate_L1", "fertility_rate_L1", "birth_rate_L1", "death_rate_L1", 
                             "f_pop_hiv_L1", "immune_dpt_L1", "immune_measles_L1", "health_expend_L1", "n_mortality_L1", "urban_pop_L1"),
                   cs = "dyad",        
                   ts = "year",
                   empri = .01*nrow(data),
                   parallel = "multicore",
                   ncpus = numCores)

summary(data.mi)

#####################
## Diagnostics
#####################
# density plot comparison
pdf(file = "density.pdf", width = 12, height = 12)
par(mfrow = c(15, 3))
plot(data.mi, which.vars = c(19:62))
dev.off()

# overimputation
pdf(file = "overimpute.pdf", width = 12, height = 60)
par(mfrow=c(15, 3))
overimpute(data.mi, var = "commonlegal") 
overimpute(data.mi, var = "commonreligion") 
overimpute(data.mi, var = "Iflow_L1") 
overimpute(data.mi, var = "Istock_L1")
overimpute(data.mi, var = "trade_L1") 
overimpute(data.mi, var = "creg_muslim_o_L1") 
overimpute(data.mi, var = "nat_disasters_o_L1") 
overimpute(data.mi, var = "refugees_o_L1") 
overimpute(data.mi, var = "troops_o_L1") 
overimpute(data.mi, var = "us_econ_aid_o_L1") 
overimpute(data.mi, var = "us_mil_aid_o_L1") 
overimpute(data.mi, var = "gov_effect_o_L1") 
overimpute(data.mi, var = "reg_quality_o_L1") 
overimpute(data.mi, var = "rule_law_o_L1") 
overimpute(data.mi, var = "control_corrupt_o_L1") 
overimpute(data.mi, var = "rgdpe_o_L1") 
overimpute(data.mi, var = "rgdpo_o_L1") 
overimpute(data.mi, var = "pwt_pop_o_L1") 
overimpute(data.mi, var = "rgdppc_exp_L1") 
overimpute(data.mi, var = "rgdppc_out_L1") 
overimpute(data.mi, var = "execnat_o_L1") 
overimpute(data.mi, var = "Mjussolis_o_L1") 
overimpute(data.mi, var = "Mresidency_o_L1") 
overimpute(data.mi, var = "Mdualcit_o_L1") 
overimpute(data.mi, var = "Mvotingrts_o_L1") 
overimpute(data.mi, var = "f_pop_L1") 
overimpute(data.mi, var = "f_labor_par_L1") 
overimpute(data.mi, var = "f_edu_ratio_L1") 
overimpute(data.mi, var = "f_parli_seats_L1")
overimpute(data.mi, var = "rural_pop_L1")
overimpute(data.mi, var = "pop14_per_L1")
overimpute(data.mi, var = "wdi_pop_tot_L1")
overimpute(data.mi, var = "pop14_mil_L1")
overimpute(data.mi, var = "adol_fertility_rate_L1")
overimpute(data.mi, var = "fertility_rate_L1")
overimpute(data.mi, var = "birth_rate_L1")
overimpute(data.mi, var = "death_rate_L1")
overimpute(data.mi, var = "f_pop_hiv_L1")
overimpute(data.mi, var = "immune_dpt_L1")
overimpute(data.mi, var = "immune_measles_L1")
overimpute(data.mi, var = "health_expend_L1")
overimpute(data.mi, var = "n_mortality_L1")
overimpute(data.mi, var = "urban_pop_L1")
dev.off()

####################################
## post imputation transformation
####################################
# recreate variables
data.mi <- transform(data.mi, pop14_mil_L1 = pop14_per_L1/100*wdi_pop_tot_L1/1000000)
data.mi <- transform(data.mi, rgdppc_exp_L1 = rgdpe_o_L1/pwt_pop_o_L1) 
data.mi <- transform(data.mi, rgdppc_out_L1 = rgdpo_o_L1/pwt_pop_o_L1) 
data.mi <- transform(data.mi, hague_adopt_entry_only_o_L1 = ifelse(hague_entry_o_L1 == 1 & hague_entry_d_L1 == 0, 1, 0))
data.mi <- transform(data.mi, hague_adopt_entry_only_d_L1 = ifelse(hague_entry_o_L1 == 0 & hague_entry_d_L1 == 1, 1, 0))

summary(data.mi)

# save data
save(data.mi, file = "adopt.mi.RData")
write.amelia(data.mi, file.stem = "adopt_mi", extension = ".dta", format = "dta")


